Creating dataset:

Now let’s proceed as in mash, selecting the ‘maxes’ from a set of the top 1000:

z.stat=read.table("z.matched.txt",header = TRUE)

v.j=matrix(rep(1,ncol(z.stat)*nrow(z.stat)),ncol=16,nrow=nrow(z.stat))
max.z=cbind(z.stat,maxz=apply(z.stat,1,function(x){max(abs(x))}))
max.z.sort=max.z[order(max.z[,"maxz"],decreasing=T),]
###use these strongest 1000 to build covariance matrices
maxes=max.z.sort[1:1000,-17]
image(cor(maxes))

write.table(maxes,"maxz.txt",col.names=FALSE,row.names=FALSE)

Now run SFA:

#system('/Users/sarahurbut/miniconda3/bin/sfa -gen ./maxz.txt -g 1000 -n 16 -o consortiumz i -k 5')
library('mash')

factor.mat=as.matrix(read.table("consortiumz_F.out"))
lambda.mat=as.matrix(read.table("consortiumz_lambda.out"))


library('mash')
library('ExtremeDeconvolution')

max.z=read.table("maxz.txt")
max.v=matrix(rep(1,ncol(max.z)*nrow(max.z)),ncol=16,nrow=nrow(max.z))
dim(max.z)
ms=deconvolution.em.with.bovy(max.z,factor.mat,max.v,lambda.mat,K=3,P=3)
saveRDS(ms,"maxstepbovy.rds")

Now we want to compute the covariance matrices using the max step:

ms=readRDS("maxstepbovy.rds")
A="consortium"
covmash=compute.hm.covmat.all.max.step(max.step = ms,b.hat = z.stat,se.hat = v.j,t.stat = max.z,Q = 5,lambda.mat = lambda.mat,A = "test",factor.mat = factor.mat,zero = T,power = 2)$covmat

set.seed(123)
index=sample(1:nrow(z.stat),50000,replace=F)
write.table(index,"index.txt")

index=read.table("index.txt")[,1]

train.t=z.stat[index,]
se.train=v.j[index,]



compute.hm.train.log.lik.pen(train.b = train.t,se.train = se.train,covmat = covmash,A=A,pen=1)

Let’s examine the patterns of covariance and the hierarchical weights and any patterns of tissue specificity.

Let’s look at the covariance patterns captured.

## [1] TRUE
## [1] TRUE
## [1] TRUE
## [1] 0.4838265
## [1] 0.4838265
## [1] 0.5161735
## [1] 0.5161735

We can see that 0.5936194 proportion of the prior weight is assigned to the learned matrices. Looks good!

Do we see patterns of specificity? It appears that specificity to one subgroup is very rare, since the correlation structure is so rich. Of note, schizophrenia and height stand out. Let’s look at tissue specific:

How many associations do we call significant at an LFSR of 0.05? 338655 which is 0.0257768 part of the data. When we plot t-spec by subgroup, we see that subgroup specific effects are rare and of small magnitude because of the sharing among tissues for a given fold change difference.

What about in ash?

#ashmeans=read.table("univariateash.pm.txt")
ashlfsr=read.table('univariateash.lfsr.txt')

#dim(ashmeans)
dim(ashlfsr)
## [1] 821124     16
sum(ashlfsr<0.05)
## [1] 82384
mean(ashlfsr<0.05)
## [1] 0.006270673
colnames(lfsr.mash)=names
rownames(lfsr.mash)=rownames(z.stat)
dist=lfsr.mash<0.05;
lapply(seq(1:ncol(lfsr.mash)),function(x){a=rownames(lfsr.mash)[which(rowSums(dist)==1&lfsr.mash[,x]<0.05)];write.table(a,paste0("../Data/traitspec/traitspec",colnames(lfsr.mash)[x],".txt"))})


lapply(seq(1:ncol(lfsr.mash)),function(x){a=rownames(lfsr.mash)[which(lfsr.mash[,x]<0.05)];write.table(a,paste0("../Data/significant/",colnames(lfsr.mash)[x],"significantin.txt"))})

Confirmation of Results

As an example confirmation, https://www.snpedia.com/index.php/Rs1042725 found nature SNP rs1042725 is associated with height (P = 4E-8) in a study involving over 20,000 individuals.rs6060369 was also confirmed.

Let’s see how many SNPS per trait:

barplot(colSums(lfsr.mash<0.05),main="Number of Snps Significant at LFSR<0.05")

Let’s generate the top SNPs per trait:

colnames(lfsr.mash)=colnames(z.stat)
rownames(lfsr.mash)=rownames(z.stat)
data.frame(apply(lfsr.mash,2,function(x){(rownames(lfsr.mash)[order(x,decreasing=F)][1:10])}))
##    reprogen_aam_beta pgc_scz_beta pgc_mdd_beta pgc_bip_beta pgc_as_beta
## 1          rs4976665   rs10484439   rs10182181    rs4382763   rs1269195
## 2         rs11739147   rs13198716    rs6752378   rs12075079   rs9375450
## 3           rs734518    rs7746199    rs6545814   rs10489290   rs2184968
## 4         rs17355209   rs13195040    rs6721750    rs4072117   rs2152876
## 5          rs1472920   rs10484399    rs2384058   rs12402454   rs4559102
## 6          rs2962842   rs17749927   rs13407913   rs10489289   rs4549631
## 7          rs2940521   rs17750424     rs713587   rs16844255   rs7738836
## 8          rs2860534   rs17693963    rs7567997   rs12911223   rs1538956
## 9          rs3923879   rs13199772    rs2033654    rs7296288   rs1340952
## 10         rs7718874   rs13197574   rs10198275    rs2135551   rs3861455
##    pgc_an_beta magic_fg_beta ibdgenetics_uc_beta ibdgenetics_ibd_beta
## 1     rs724016     rs2908282           rs3812558            rs6556412
## 2    rs9846396    rs10830961          rs10753575            rs3812558
## 3   rs12969709      rs477224           rs6426833            rs4487900
## 4    rs1344674    rs12475700          rs10737482           rs12413565
## 5   rs11662368     rs1402837           rs7553638            rs1468889
## 6    rs6763931      rs486981           rs4654903            rs6656890
## 7    rs6764769      rs508506          rs11805303            rs7515029
## 8   rs12964203      rs494874           rs1343151            rs4655679
## 9    rs1539952     rs2685805          rs11465817           rs10789224
## 10   rs6440003     rs6709087          rs10889677           rs11209008
##    ibdgenetics_cd_beta giant_height_beta giant_bmi_beta gefos_ls_beta
## 1            rs6556412         rs1571466       rs879048     rs6469792
## 2            rs3812558         rs3116228      rs7193144     rs7010043
## 3            rs4487900         rs3100596     rs11664327     rs6469795
## 4            rs1468889         rs1078968      rs1893512     rs9650075
## 5            rs7515029         rs6763931      rs2568958     rs4424291
## 6            rs4655679        rs11711375     rs11209951     rs7016585
## 7           rs10789224        rs11725731       rs630372     rs6469804
## 8           rs11209005        rs17427971       rs543874    rs10955924
## 9           rs11209008         rs7684769     rs10913469     rs9594738
## 10           rs6688383          rs951252      rs2867105     rs7014574
##    gefos_fn_beta gefos_fa_beta angst_anx_beta
## 1      rs6894139     rs3779381     rs12969709
## 2     rs10037512     rs3801387     rs11662368
## 3      rs1366594      rs917727      rs1539952
## 4      rs1346452      rs718766     rs12964203
## 5      rs4916669     rs7776725      rs6567155
## 6       rs984399     rs2254595     rs12957347
## 7      rs7728690     rs6965592     rs12970134
## 8      rs4916666     rs6894139     rs11584383
## 9      rs1159666    rs10260002      rs9846396
## 10    rs13436401     rs1366594      rs1150754
write.table(data.frame(apply(lfsr.mash,2,function(x){(rownames(lfsr.mash)[order(x,decreasing=F)][1:10])}))
,"top10pertrait.txt")

Let’s show how this integrates with the GTEX results:

topten=read.table("top10pertrait.txt",header=T,stringsAsFactors = F)
topten[1,"scz"]
## [1] "rs10484439"

Is an eQTL in Brain, Frontal Cortex as shown here in the GteX browser representing a possible link to disease.

Now, we’d like to add pairwise sharing by magnitude and sign:

library("mashr")
se.matched=as.matrix(read.table("se.matched.txt"))
pm.mash.beta=post.means*se.matched
colnames(pm.mash.beta)=names
lfsr.mash.sig=lfsr.mash[rowSums(lfsr.mash<0.05)>0,]##only 137,223 are significant in at least one subgroup
pm.mash.sig=pm.mash.beta[rowSums(lfsr.mash<0.05)>0,]

signheatmap=compute.sharing.by.sign(lfsr.mash = lfsr.mash.sig,thresh = 0.05,pm.mash.beta = pm.mash.sig)
signheatmap[lower.tri(signheatmap)] <- NA
magheatmap=compute.mag.by.sharing(lfsr.mash = lfsr.mash.sig,thresh = 0.05,pm.mash.beta = pm.mash.sig)
magheatmap[lower.tri(magheatmap)] <- NA


library('colorRamps')
library('corrplot')
library(gplots)
library(ggplot2)

class(signheatmap)
## [1] "matrix"
heatmap.2(signheatmap,Rowv=FALSE,Colv=FALSE,
          symm=TRUE,dendrogram="none",density="none",trace="none",#col=redblue,
          col=blue2green,main=paste0("Pairwise Sharing by Sign"),
          cexRow=0.6,cexCol=0.5,cex.main=0.5,breaks=seq(0.7,1,0.01))

library('lattice')

clrs <- colorRampPalette(rev(c("#D73027","#FC8D59","#FEE090","#FFFFBF",
                               "#E0F3F8","#91BFDB","#4575B4")))(64)
#clrs[63:64] <- "darkviolet"
lat=signheatmap
lat[lower.tri(lat)] <- NA
print(levelplot(lat,col.regions = clrs,xlab = "",ylab = "",colorkey = TRUE,title(main="PairwiseSharingBySign")))

heatmap.2(magheatmap,Rowv=FALSE,Colv=FALSE,
          symm=TRUE,dendrogram="none",density="none",trace="none",#col=redblue,
          col=blue2green,main=paste0("Pairwise Sharing by Magnitude"),
          cexRow=0.6,cexCol=2,cex.main=0.5,#breaks=seq(0.35,1,0.01),
          labCol=NA)

library('lattice')

clrs <- colorRampPalette(rev(c("#D73027","#FC8D59","#FEE090","#FFFFBF",
                               "#E0F3F8","#91BFDB","#4575B4")))(64)
#clrs[63:64] <- "darkviolet"
absmagheatmap=compute.mag.by.sharing(lfsr.mash = lfsr.mash.sig,thresh = 0.05,pm.mash.beta = abs(pm.mash.sig))
absmagheatmap[lower.tri(absmagheatmap)] <- NA
lat=absmagheatmap
lat[lower.tri(lat)] <- NA
print(levelplot(lat,col.regions = clrs,xlab = "",ylab = "",colorkey = TRUE,title(main="PairwiseSharingByMagnitude")))

Now, let’s look at sharing my asbolute value of magntiude:

heatmap.2(signheatmap,Rowv=FALSE,Colv=FALSE,
          symm=TRUE,dendrogram="none",density="none",trace="none",#col=redblue,
          col=blue2green,main=paste0("Pairwise Sharing by Sign"),
          cexRow=0.6,cexCol=0.5,cex.main=0.5)

#,breaks=seq(0.7,1,0.01))

heatmap.2(1-signheatmap,Rowv=FALSE,Colv=FALSE,
          symm=TRUE,dendrogram="none",density="none",trace="none",#col=redblue,
          col=blue2green,main=paste0("Pairwise Sharing by Opposite Sign"),
          cexRow=0.6,cexCol=0.5,cex.main=0.5)

#,breaks=seq(0.7,1,0.01))



absmagheatmap=compute.mag.by.sharing(lfsr.mash = lfsr.mash.sig,thresh = 0.05,pm.mash.beta = abs(pm.mash.sig))
absmagheatmap[lower.tri(absmagheatmap)] <- NA
heatmap.2(absmagheatmap,Rowv=FALSE,Colv=FALSE,
          symm=TRUE,dendrogram="none",density="none",trace="none",#col=redblue,
          col=blue2green,main=paste0("Pairwise Sharing by abs(Magnitude)"),
          cexRow=0.6,cexCol=2,cex.main=0.5,#breaks=seq(0.35,1,0.01),
          labCol=NA)

heatmap.2(magheatmap,Rowv=FALSE,Colv=FALSE,
          symm=TRUE,dendrogram="none",density="none",trace="none",#col=redblue,
          col=blue2green,main=paste0("Pairwise Sharing by Magnitude"),
          cexRow=0.6,cexCol=2,cex.main=0.5,#breaks=seq(0.35,1,0.01),
          labCol=NA)